{
 "cells": [
  {
   "cell_type": "raw",
   "id": "faca165c6b3ca063",
   "metadata": {},
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "16360f9c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel('./data/企业标签元数据v1.2.xlsx',sheet_name='生成企业现有标签',dtype=str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c45cb608",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>TAG_NAME</th>\n",
       "      <th>TAG_COMMENT</th>\n",
       "      <th>EFFECTIVE_DATE</th>\n",
       "      <th>INVALID_DATE</th>\n",
       "      <th>DATA_TYPE</th>\n",
       "      <th>BUSINESS_DESC</th>\n",
       "      <th>TECH_DESC</th>\n",
       "      <th>TAG_TYPE</th>\n",
       "      <th>IS_AUTHORITY</th>\n",
       "      <th>...</th>\n",
       "      <th>TABLE_NAME</th>\n",
       "      <th>TABLE_COMMENT</th>\n",
       "      <th>REPOSITORY</th>\n",
       "      <th>DOMAIN</th>\n",
       "      <th>OWNER</th>\n",
       "      <th>CREATE_DATE</th>\n",
       "      <th>IS_VALID</th>\n",
       "      <th>REMARK</th>\n",
       "      <th>RANK</th>\n",
       "      <th>VERSION</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0097</td>\n",
       "      <td>IS_TORCH_PLAN</td>\n",
       "      <td>国家火炬计划重点高新技术企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>97</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A0098</td>\n",
       "      <td>IS_KJSSWS</td>\n",
       "      <td>会计师事务所</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A0099</td>\n",
       "      <td>IS_KJZXJG</td>\n",
       "      <td>科技咨询机构</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A0100</td>\n",
       "      <td>IS_NSQZNXF</td>\n",
       "      <td>南山区智能消防工程技术研究中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>100</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A0101</td>\n",
       "      <td>IS_SZSZNHYZZYCXZX</td>\n",
       "      <td>深圳市智能海洋制造业创新中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>A0092</td>\n",
       "      <td>IS_JSFW</td>\n",
       "      <td>技术服务</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>A0093</td>\n",
       "      <td>IS_NZQY</td>\n",
       "      <td>内资企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>A0094</td>\n",
       "      <td>IS_TOP500_SZGY</td>\n",
       "      <td>深圳市工业五百强企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“深圳市...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>94</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>A0095</td>\n",
       "      <td>IS_ZGMGHZ</td>\n",
       "      <td>中国美国合作</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>A0096</td>\n",
       "      <td>IS_GYLWLQY</td>\n",
       "      <td>供应链物流企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>192 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        ID           TAG_NAME      TAG_COMMENT EFFECTIVE_DATE  \\\n",
       "0    A0097      IS_TORCH_PLAN   国家火炬计划重点高新技术企业     2020/12/27   \n",
       "1    A0098          IS_KJSSWS           会计师事务所     2020/12/27   \n",
       "2    A0099          IS_KJZXJG           科技咨询机构     2020/12/27   \n",
       "3    A0100         IS_NSQZNXF  南山区智能消防工程技术研究中心     2020/12/27   \n",
       "4    A0101  IS_SZSZNHYZZYCXZX   深圳市智能海洋制造业创新中心     2020/12/27   \n",
       "..     ...                ...              ...            ...   \n",
       "187  A0092            IS_JSFW             技术服务     2020/12/27   \n",
       "188  A0093            IS_NZQY             内资企业     2020/12/27   \n",
       "189  A0094     IS_TOP500_SZGY       深圳市工业五百强企业     2020/12/27   \n",
       "190  A0095          IS_ZGMGHZ           中国美国合作     2020/12/27   \n",
       "191  A0096         IS_GYLWLQY          供应链物流企业     2020/12/27   \n",
       "\n",
       "            INVALID_DATE DATA_TYPE  \\\n",
       "0    2120/12/27 23:59:59    NUMBER   \n",
       "1    2120/12/27 23:59:59    NUMBER   \n",
       "2    2120/12/27 23:59:59    NUMBER   \n",
       "3    2120/12/27 23:59:59    NUMBER   \n",
       "4    2120/12/27 23:59:59    NUMBER   \n",
       "..                   ...       ...   \n",
       "187  2120/12/27 23:59:59    NUMBER   \n",
       "188  2120/12/27 23:59:59    NUMBER   \n",
       "189  2120/12/27 23:59:59    NUMBER   \n",
       "190  2120/12/27 23:59:59    NUMBER   \n",
       "191  2120/12/27 23:59:59    NUMBER   \n",
       "\n",
       "                                         BUSINESS_DESC  \\\n",
       "0    组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...   \n",
       "1                                                  NaN   \n",
       "2                                                  NaN   \n",
       "3    组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...   \n",
       "4                                                  NaN   \n",
       "..                                                 ...   \n",
       "187                                                NaN   \n",
       "188                                                NaN   \n",
       "189  组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“深圳市...   \n",
       "190                                                NaN   \n",
       "191                                                NaN   \n",
       "\n",
       "                                             TECH_DESC TAG_TYPE IS_AUTHORITY  \\\n",
       "0    SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1          NaN   \n",
       "1    SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "2    SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "3    SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1          NaN   \n",
       "4    SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "..                                                 ...      ...          ...   \n",
       "187  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "188  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "189  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1          NaN   \n",
       "190  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "191  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "\n",
       "     ...                                         TABLE_NAME TABLE_COMMENT  \\\n",
       "0    ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "1    ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "2    ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "3    ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "4    ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "..   ...                                                ...           ...   \n",
       "187  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "188  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "189  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "190  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "191  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "\n",
       "      REPOSITORY DOMAIN OWNER         CREATE_DATE IS_VALID  \\\n",
       "0    DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "1    DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "2    DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "3    DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "4    DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "..           ...    ...   ...                 ...      ...   \n",
       "187  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "188  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "189  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "190  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "191  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "\n",
       "                                                REMARK RANK VERSION  \n",
       "0    南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...   97    V1.2  \n",
       "1                                                  NaN   98    V1.2  \n",
       "2                                                  NaN   99    V1.2  \n",
       "3    南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...  100    V1.2  \n",
       "4                                                  NaN  101    V1.2  \n",
       "..                                                 ...  ...     ...  \n",
       "187                                                NaN   92    V1.2  \n",
       "188                                                NaN   93    V1.2  \n",
       "189  南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...   94    V1.2  \n",
       "190                                                NaN   95    V1.2  \n",
       "191                                                NaN   96    V1.2  \n",
       "\n",
       "[192 rows x 22 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cbb846f1",
   "metadata": {},
   "source": [
    "自己创建一个DataFrame结构"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5e34b9a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>population</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>aaa</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>bbb</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ccc</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  country  population\n",
       "0     aaa          10\n",
       "1     bbb          12\n",
       "2     ccc          14"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'country':['aaa','bbb','ccc'],\n",
    "       'population':[10,12,14]}\n",
    "df_data = pd.DataFrame(data)\n",
    "df_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "836f2aca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3 entries, 0 to 2\n",
      "Data columns (total 2 columns):\n",
      " #   Column      Non-Null Count  Dtype \n",
      "---  ------      --------------  ----- \n",
      " 0   country     3 non-null      object\n",
      " 1   population  3 non-null      int64 \n",
      "dtypes: int64(1), object(1)\n",
      "memory usage: 180.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df_data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "77deb665",
   "metadata": {},
   "source": [
    "取指定的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9d90da16",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    A0097\n",
       "1    A0098\n",
       "2    A0099\n",
       "3    A0100\n",
       "4    A0101\n",
       "Name: ID, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "id = df['ID']\n",
    "id[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c668ed99",
   "metadata": {},
   "source": [
    "series:dataframe中的一部分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "916f9858",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=192, step=1)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "id.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "267f3480",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['A0097', 'A0098', 'A0099', 'A0100', 'A0101', 'A0102', 'A0103',\n",
       "       'A0104', 'A0105', 'A0106', 'A0107', 'A0108', 'A0109', 'A0110',\n",
       "       'A0111', 'A0112', 'A0113', 'A0114', 'A0115', 'A0116', 'A0117',\n",
       "       'A0118', 'A0119', 'A0120', 'A0121', 'A0122', 'A0123', 'A0124',\n",
       "       'A0125', 'A0126', 'A0127', 'A0128', 'A0129', 'A0130', 'A0131',\n",
       "       'A0132', 'A0133', 'A0134', 'A0135', 'A0136', 'A0137', 'A0138',\n",
       "       'A0139', 'A0140', 'A0141', 'A0142', 'A0143', 'A0144', 'A0145',\n",
       "       'A0146', 'A0147', 'A0148', 'A0149', 'A0150', 'A0151', 'A0152',\n",
       "       'A0153', 'A0154', 'A0155', 'A0156', 'A0157', 'A0158', 'A0159',\n",
       "       'A0160', 'A0161', 'A0162', 'A0163', 'A0164', 'A0165', 'A0166',\n",
       "       'A0167', 'A0168', 'A0169', 'A0170', 'A0171', 'A0172', 'A0173',\n",
       "       'A0174', 'A0175', 'A0176', 'A0177', 'A0178', 'A0179', 'A0180',\n",
       "       'A0181', 'A0182', 'A0183', 'A0184', 'A0185', 'A0186', 'A0187',\n",
       "       'A0188', 'A0189', 'A0190', 'A0191', 'A0192', 'A0193', 'A0001',\n",
       "       'A0003', 'A0004', 'A0005', 'A0006', 'A0007', 'A0008', 'A0009',\n",
       "       'A0010', 'A0011', 'A0012', 'A0013', 'A0014', 'A0015', 'A0016',\n",
       "       'A0017', 'A0018', 'A0019', 'A0020', 'A0021', 'A0022', 'A0023',\n",
       "       'A0024', 'A0025', 'A0026', 'A0027', 'A0028', 'A0029', 'A0030',\n",
       "       'A0031', 'A0032', 'A0033', 'A0034', 'A0035', 'A0036', 'A0037',\n",
       "       'A0038', 'A0039', 'A0040', 'A0041', 'A0042', 'A0043', 'A0044',\n",
       "       'A0045', 'A0046', 'A0047', 'A0048', 'A0049', 'A0050', 'A0051',\n",
       "       'A0052', 'A0053', 'A0054', 'A0055', 'A0056', 'A0057', 'A0058',\n",
       "       'A0059', 'A0060', 'A0061', 'A0062', 'A0063', 'A0064', 'A0065',\n",
       "       'A0066', 'A0067', 'A0068', 'A0069', 'A0070', 'A0071', 'A0072',\n",
       "       'A0073', 'A0074', 'A0075', 'A0076', 'A0077', 'A0078', 'A0079',\n",
       "       'A0080', 'A0081', 'A0082', 'A0083', 'A0084', 'A0085', 'A0086',\n",
       "       'A0087', 'A0088', 'A0089', 'A0090', 'A0091', 'A0092', 'A0093',\n",
       "       'A0094', 'A0095', 'A0096'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "id.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "4a12960c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>TAG_NAME</th>\n",
       "      <th>TAG_COMMENT</th>\n",
       "      <th>EFFECTIVE_DATE</th>\n",
       "      <th>INVALID_DATE</th>\n",
       "      <th>DATA_TYPE</th>\n",
       "      <th>BUSINESS_DESC</th>\n",
       "      <th>TECH_DESC</th>\n",
       "      <th>TAG_TYPE</th>\n",
       "      <th>IS_AUTHORITY</th>\n",
       "      <th>...</th>\n",
       "      <th>TABLE_NAME</th>\n",
       "      <th>TABLE_COMMENT</th>\n",
       "      <th>REPOSITORY</th>\n",
       "      <th>DOMAIN</th>\n",
       "      <th>OWNER</th>\n",
       "      <th>CREATE_DATE</th>\n",
       "      <th>IS_VALID</th>\n",
       "      <th>REMARK</th>\n",
       "      <th>RANK</th>\n",
       "      <th>VERSION</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0097</td>\n",
       "      <td>IS_TORCH_PLAN</td>\n",
       "      <td>国家火炬计划重点高新技术企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>97</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A0098</td>\n",
       "      <td>IS_KJSSWS</td>\n",
       "      <td>会计师事务所</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A0099</td>\n",
       "      <td>IS_KJZXJG</td>\n",
       "      <td>科技咨询机构</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A0100</td>\n",
       "      <td>IS_NSQZNXF</td>\n",
       "      <td>南山区智能消防工程技术研究中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>100</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A0101</td>\n",
       "      <td>IS_SZSZNHYZZYCXZX</td>\n",
       "      <td>深圳市智能海洋制造业创新中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      ID           TAG_NAME      TAG_COMMENT EFFECTIVE_DATE  \\\n",
       "0  A0097      IS_TORCH_PLAN   国家火炬计划重点高新技术企业     2020/12/27   \n",
       "1  A0098          IS_KJSSWS           会计师事务所     2020/12/27   \n",
       "2  A0099          IS_KJZXJG           科技咨询机构     2020/12/27   \n",
       "3  A0100         IS_NSQZNXF  南山区智能消防工程技术研究中心     2020/12/27   \n",
       "4  A0101  IS_SZSZNHYZZYCXZX   深圳市智能海洋制造业创新中心     2020/12/27   \n",
       "\n",
       "          INVALID_DATE DATA_TYPE  \\\n",
       "0  2120/12/27 23:59:59    NUMBER   \n",
       "1  2120/12/27 23:59:59    NUMBER   \n",
       "2  2120/12/27 23:59:59    NUMBER   \n",
       "3  2120/12/27 23:59:59    NUMBER   \n",
       "4  2120/12/27 23:59:59    NUMBER   \n",
       "\n",
       "                                       BUSINESS_DESC  \\\n",
       "0  组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...   \n",
       "1                                                NaN   \n",
       "2                                                NaN   \n",
       "3  组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...   \n",
       "4                                                NaN   \n",
       "\n",
       "                                           TECH_DESC TAG_TYPE IS_AUTHORITY  \\\n",
       "0  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1          NaN   \n",
       "1  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "2  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "3  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1          NaN   \n",
       "4  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN          NaN   \n",
       "\n",
       "   ...                                         TABLE_NAME TABLE_COMMENT  \\\n",
       "0  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "1  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "2  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "3  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "4  ...  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...        企业基本信息   \n",
       "\n",
       "    REPOSITORY DOMAIN OWNER         CREATE_DATE IS_VALID  \\\n",
       "0  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "1  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "2  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "3  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "4  DATASUMMARY     机构   李从志  2021/1/30 15:37:43        1   \n",
       "\n",
       "                                              REMARK RANK VERSION  \n",
       "0  南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...   97    V1.2  \n",
       "1                                                NaN   98    V1.2  \n",
       "2                                                NaN   99    V1.2  \n",
       "3  南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...  100    V1.2  \n",
       "4                                                NaN  101    V1.2  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "64d1df6e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    A0097\n",
       "1    A0098\n",
       "2    A0099\n",
       "3    A0100\n",
       "4    A0101\n",
       "Name: ID, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['ID'][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "5db91871",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>TAG_COMMENT</th>\n",
       "      <th>EFFECTIVE_DATE</th>\n",
       "      <th>INVALID_DATE</th>\n",
       "      <th>DATA_TYPE</th>\n",
       "      <th>BUSINESS_DESC</th>\n",
       "      <th>TECH_DESC</th>\n",
       "      <th>TAG_TYPE</th>\n",
       "      <th>IS_AUTHORITY</th>\n",
       "      <th>CYCLE</th>\n",
       "      <th>...</th>\n",
       "      <th>TABLE_NAME</th>\n",
       "      <th>TABLE_COMMENT</th>\n",
       "      <th>REPOSITORY</th>\n",
       "      <th>DOMAIN</th>\n",
       "      <th>OWNER</th>\n",
       "      <th>CREATE_DATE</th>\n",
       "      <th>IS_VALID</th>\n",
       "      <th>REMARK</th>\n",
       "      <th>RANK</th>\n",
       "      <th>VERSION</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TAG_NAME</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>IS_TORCH_PLAN</th>\n",
       "      <td>A0097</td>\n",
       "      <td>国家火炬计划重点高新技术企业</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>97</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IS_KJSSWS</th>\n",
       "      <td>A0098</td>\n",
       "      <td>会计师事务所</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IS_KJZXJG</th>\n",
       "      <td>A0099</td>\n",
       "      <td>科技咨询机构</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IS_NSQZNXF</th>\n",
       "      <td>A0100</td>\n",
       "      <td>南山区智能消防工程技术研究中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...</td>\n",
       "      <td>100</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IS_SZSZNHYZZYCXZX</th>\n",
       "      <td>A0101</td>\n",
       "      <td>深圳市智能海洋制造业创新中心</td>\n",
       "      <td>2020/12/27</td>\n",
       "      <td>2120/12/27 23:59:59</td>\n",
       "      <td>NUMBER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...</td>\n",
       "      <td>企业基本信息</td>\n",
       "      <td>DATASUMMARY</td>\n",
       "      <td>机构</td>\n",
       "      <td>李从志</td>\n",
       "      <td>2021/1/30 15:37:43</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101</td>\n",
       "      <td>V1.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      ID      TAG_COMMENT EFFECTIVE_DATE         INVALID_DATE  \\\n",
       "TAG_NAME                                                                        \n",
       "IS_TORCH_PLAN      A0097   国家火炬计划重点高新技术企业     2020/12/27  2120/12/27 23:59:59   \n",
       "IS_KJSSWS          A0098           会计师事务所     2020/12/27  2120/12/27 23:59:59   \n",
       "IS_KJZXJG          A0099           科技咨询机构     2020/12/27  2120/12/27 23:59:59   \n",
       "IS_NSQZNXF         A0100  南山区智能消防工程技术研究中心     2020/12/27  2120/12/27 23:59:59   \n",
       "IS_SZSZNHYZZYCXZX  A0101   深圳市智能海洋制造业创新中心     2020/12/27  2120/12/27 23:59:59   \n",
       "\n",
       "                  DATA_TYPE  \\\n",
       "TAG_NAME                      \n",
       "IS_TORCH_PLAN        NUMBER   \n",
       "IS_KJSSWS            NUMBER   \n",
       "IS_KJZXJG            NUMBER   \n",
       "IS_NSQZNXF           NUMBER   \n",
       "IS_SZSZNHYZZYCXZX    NUMBER   \n",
       "\n",
       "                                                       BUSINESS_DESC  \\\n",
       "TAG_NAME                                                               \n",
       "IS_TORCH_PLAN      组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“国家火...   \n",
       "IS_KJSSWS                                                        NaN   \n",
       "IS_KJZXJG                                                        NaN   \n",
       "IS_NSQZNXF         组织机构代码、单位名称匹配（表中统一社会信用代码和注册号值混乱相间），且单位特性包含“南山区...   \n",
       "IS_SZSZNHYZZYCXZX                                                NaN   \n",
       "\n",
       "                                                           TECH_DESC TAG_TYPE  \\\n",
       "TAG_NAME                                                                        \n",
       "IS_TORCH_PLAN      SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1   \n",
       "IS_KJSSWS          SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN   \n",
       "IS_KJZXJG          SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN   \n",
       "IS_NSQZNXF         SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...       B1   \n",
       "IS_SZSZNHYZZYCXZX  SELECT DISTINCT TYSHXYDM, ZZJGDM, ZCH, QYMC,1 ...      NaN   \n",
       "\n",
       "                  IS_AUTHORITY CYCLE  ...  \\\n",
       "TAG_NAME                              ...   \n",
       "IS_TORCH_PLAN              NaN   NaN  ...   \n",
       "IS_KJSSWS                  NaN   NaN  ...   \n",
       "IS_KJZXJG                  NaN   NaN  ...   \n",
       "IS_NSQZNXF                 NaN   NaN  ...   \n",
       "IS_SZSZNHYZZYCXZX          NaN   NaN  ...   \n",
       "\n",
       "                                                          TABLE_NAME  \\\n",
       "TAG_NAME                                                               \n",
       "IS_TORCH_PLAN      DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...   \n",
       "IS_KJSSWS          DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...   \n",
       "IS_KJZXJG          DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...   \n",
       "IS_NSQZNXF         DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...   \n",
       "IS_SZSZNHYZZYCXZX  DATASUMMARY.ORG_BASIC_INFO,LEGAL_ORG_TAG_MERGE...   \n",
       "\n",
       "                  TABLE_COMMENT   REPOSITORY DOMAIN OWNER         CREATE_DATE  \\\n",
       "TAG_NAME                                                                        \n",
       "IS_TORCH_PLAN            企业基本信息  DATASUMMARY     机构   李从志  2021/1/30 15:37:43   \n",
       "IS_KJSSWS                企业基本信息  DATASUMMARY     机构   李从志  2021/1/30 15:37:43   \n",
       "IS_KJZXJG                企业基本信息  DATASUMMARY     机构   李从志  2021/1/30 15:37:43   \n",
       "IS_NSQZNXF               企业基本信息  DATASUMMARY     机构   李从志  2021/1/30 15:37:43   \n",
       "IS_SZSZNHYZZYCXZX        企业基本信息  DATASUMMARY     机构   李从志  2021/1/30 15:37:43   \n",
       "\n",
       "                  IS_VALID                                             REMARK  \\\n",
       "TAG_NAME                                                                        \n",
       "IS_TORCH_PLAN            1  南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...   \n",
       "IS_KJSSWS                1                                                NaN   \n",
       "IS_KJZXJG                1                                                NaN   \n",
       "IS_NSQZNXF               1  南山企服的企业信息表尽量使用统代、注册号或者组织机构代码匹配，因为他的企业名称比较不规范，存...   \n",
       "IS_SZSZNHYZZYCXZX        1                                                NaN   \n",
       "\n",
       "                  RANK VERSION  \n",
       "TAG_NAME                        \n",
       "IS_TORCH_PLAN       97    V1.2  \n",
       "IS_KJSSWS           98    V1.2  \n",
       "IS_KJZXJG           99    V1.2  \n",
       "IS_NSQZNXF         100    V1.2  \n",
       "IS_SZSZNHYZZYCXZX  101    V1.2  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.set_index('TAG_NAME')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "219ea374",
   "metadata": {},
   "source": [
    "索引可以自己指定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "905cf197",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TAG_NAME\n",
       "IS_TORCH_PLAN        A0097\n",
       "IS_KJSSWS            A0098\n",
       "IS_KJZXJG            A0099\n",
       "IS_NSQZNXF           A0100\n",
       "IS_SZSZNHYZZYCXZX    A0101\n",
       "Name: ID, dtype: object"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['ID'][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6a75d88",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
